Jtm.php
<?php
namespace Tlf;
/**
* Json To Mysql class
*/
class Jtm {
/**
* Set `true` to disable foreign key checks during INSERTs
*/
public bool $disable_foreign_key_checks = false;
/**
* Convert a json line into an array
*
* @param $line json that ends with `},` (bc there is another row
*/
public function json_line_to_array(string $line): ?array {
if (trim($line)=='[')return null;
if (trim($line)==']')return null;
$line = substr($line,0,strrpos($line,'}')+1);
$row = json_decode($line, true);
return $row;
}
/**
* check if a value is a date
* @param $value a value to check for a date format
*/
public function is_date($value): bool{
if (!is_string($value))return false;
return preg_match('/[0-9]{4}\-[0-9]{2}\-[0-9]{2}/', $value);
}
/**
*
* convert column names to camel_case
*
* @param $dirty_schema, as generated by `generate_schema()`
*
*/
public function clean_schema(array $dirty_schema): array {
// print_r($dirty_schema);
$clean_schema = [];
foreach ($dirty_schema as $col=>$info){
$json_key = $col;
$dirty = $col;
$dirty = preg_replace('/[^a-zA-Z_]/','_', $dirty); //replace bad chars with underscore
$dirty = preg_replace('/([A-Z]+)/','_$1', $dirty); // replace uppercase letters with `_LETTER`
$dirty = preg_replace('/_+/', '_', $dirty); // replace multiple underscores
if (substr($dirty,0,1)=='_')$dirty = substr($dirty,1); // remove first char if underscore
$dirty = strtolower($dirty); // lowercase string
$col_name = $dirty;
$info['name'] = $col_name;
$info['json_key'] = $col;
$clean_schema[$col_name] = $info;
}
// echo "\n\n\n-----------\n\n";
// echo "\n\n\n-----------\n\n";
// print_r($clean_schema);
// exit;
return $clean_schema;
}
/**
* @param $prefix the file prefix as passed to @see(generate_sql_insert())
* @param $pdo a pdo instance to use to execute the sql
*
* @return total number of rows inserted
*/
public function execute_insert_files(string $prefix, \PDO $pdo): int{
$dir = dirname($prefix);
$name = basename($prefix);
$insert_prefix = $name.'-insert';
$prefix_len = strlen($insert_prefix);
$len = strlen($name);
$files = [];
foreach (scandir($dir) as $file){
if (substr($file,0,$prefix_len)!=$insert_prefix)continue;
$end_pos = strrpos('-', $file);
// file is like $prefix-insert-NUMBER.sql
// trim off .sql & everything before NUMBER
$index = substr($file, $end_pos);
$index = substr($file,0,-4);
$index = (int)$index;
$files[$index] = $file;
}
ksort($files);
$total_rows = 0;
foreach ($files as $f){
$sql = file_get_contents($dir.'/'.$f);
// echo "\n\n\n-----------\n\n";
// echo $sql;
// echo "\n\n\n-----------\n\n";
// $stmt = $pdo->prepare($sql);
// $num_rows = $stmt->execute();
$num_rows = $pdo->exec(trim($sql));
$total_rows += $num_rows;
}
return $total_rows;
}
/**
*
* @param $source_file the json file to parse for a schema
*/
public function generate_schema(string $source_file, array $schema_info): array {
$schema = [];
foreach ($schema_info as $col => $data){
$maxLen = $data['maxlen'];
if ($maxLen > 1000){
$type = "TEXT";
} else if ($data['is_date']>0){
$type = 'DATE';
} else if ($data['is_string']>0
&&$data['is_string']>$data['is_numeric']
|| $data['is_array'] > 0
&& $data['is_array'] > $data['is_numeric']
) {
$len = $maxLen + 50;
$type = "VARCHAR({$len})"; //or varchar
} else if ($data['is_float']>0
&& $data['is_float'] > $data['is_int']
){
$type = "FLOAT";
} else if ($data['is_bool']>0){
$type = "BOOL";
} else {
$len = $maxLen;
if ($len < 10)$len = 10;
$type = "int({$len})";
}
$schema[$col] = [
'name'=> $col,
'type'=> $type,
];
}
return $schema;
}
public function generate_schema_info($source_file){
$schemaData = [];
$sampleData = [
'maxlen'=> 0,
'minlen'=> false,
'count' => 0,
'is_string' => 0,
'is_numeric' => 0,
'is_int' => 0,
'is_float'=> 0,
'is_bool' => 0,
'is_array'=> 0,
'is_date' => 0,
'is_json' => 0,
'sampleValue' => '',
];
//loop over every row
//modify keys of schemaDataArray
// finally json_encode & output the schema data to an info file <- For the developer to inspect their input data
// json_encode & output an actual schema-file that details how to create the table.
$reader = fopen($source_file, 'r');
while ($line = fgets($reader)) {
$row = $this->json_line_to_array($line);
if ($row===null)continue;
foreach ($row as $column => $value){
$data = $schemaData[$column] ?? $sampleData;
$data['count']++;
$valueForLen = is_array($value) ? json_encode($value) : $value;
$len = strlen($valueForLen??'');
if ($len===false || $data['maxlen'] > $len){
$data['maxlen'] = $data['maxlen'];
} else {
$data['maxlen'] = $len;
$data['sampleValue'] = substr($valueForLen??'', 0, 1000);
}
$data['minlen'] = ($len == false || $data['minlen'] < $len) && $data['minlen'] !== false ? $data['minlen'] : $len;
$data['is_string'] += is_string($value) ? 1 : 0;
$data['is_bool'] += is_bool($value) ? 1 : 0;
$data['is_array'] += is_array($value) ? 1 : 0;
$data['is_date'] += $this->is_date($value) ? 1 : 0;
if (is_numeric($value)){
$data['is_numeric'] += 1;
$data['is_int'] += is_int($value) || (int)$value==$value ? 1 : 0;
$data['is_float'] += is_float($value) || (float)$value==$value ? 1 : 0;
}
if (is_string($value)){
$json = json_decode($value, true);
$data['is_json'] += ($json===null) ? 0 : 1;
}
$schemaData[$column] = $data;
}
}
fclose($reader);
return $schemaData;
}
/**
* @param $tableName name of table to create
* @param $schema schema ... @see(generate_schema())
*/
public function generate_sql_create(string $tableName, array $schema, bool $dropIfExists=true){
$colStatements = [];
foreach ($schema as $col => $info){
$statement = '`'.$info['name'].'` '.$info['type'];
$colStatements[] = $statement;
}
$colsSql = implode(", ", $colStatements);
$drop = $dropIfExists ? "DROP TABLE IF EXISTS `{$tableName}`;\n" : '';
$sql =
<<<SQL
{$drop}
CREATE TABLE `{$tableName}`
(
{$colsSql}
)
;
SQL;
return $sql;
}
/**
*
* Generate sql for INSERTs and write it to disk
*
* @param $source_file path to the source json file
* @param $target_dir the directory to write the sql-insert files to
* @param $tableName name of table to insert into
* @param $schema the schema generated by @see(generate_schema())
* @param $chunkByLength Maximum `strlen()` of any output sql
* @param ?$rowModifier a function that accepts `array $row` and returns (optionally modified) `array` $row
*/
public function generate_sql_insert(string $source_file, string $out_prefix, string $tableName, array $schema, $chunkByLength, ?callable $rowModifier = null){
$pdo = new \PDO('sqlite::memory:');
$reader = fopen($source_file, 'r');
/** @key should be to access it in the json. @value should be the database column name */
$columns = [];
foreach ($schema as $col_name=>$info){
$json_key = $info['json_key'] ?? $col_name;
$columns[$json_key] = $col_name;
}
// $cols = array_combine(array_keys($schema), array_keys($schema));
// $colsSql = '`'.implode('`,`', $cols).'`';
$colsSql = '`'.implode('`,`', $columns).'`';
$fk = $this->disable_foreign_key_checks
? "SET FOREIGN_KEY_CHECKS=0;"
: "";
$insert =
<<<SQL
$fk
INSERT INTO `{$tableName}`
( {$colsSql} )
VALUES
SQL;
//goto here
$rowCount = 0;
$fileIndex = 0;
$writer = null;
$rows = [];
$len = 0;
while ($line = fgets($reader)) {
$row = $this->json_line_to_array($line);
if ($row==false)continue;
$rowCount++;
$row = array_filter($row,
// function($value, $key) use ($cols){
function($value, $key) use ($columns){
return isset($columns[$key]);
}, ARRAY_FILTER_USE_BOTH
);
// var_dump($row);
// exit;
if ($rowModifier!=null){
$row = $rowModifier($row);
}
$row = array_map(
function($value) use ($pdo){
if (is_array($value)){
$value = json_encode($value);
}
if (is_string($value))$value = $pdo->quote($value);
else if (strlen($value??'')==0)$value = 'NULL';
// $value = '"'.substr($value,1,-1).'"';
// return "{$value}";// Why am I returning it like that?
return $value;
}, $row
);
$data = [];
// var_dump($schema);
// exit;
foreach ($schema as $info){
$key = $info['json_key'] ?? $info['name'];
$data[] = $row[$key] ?? '0';
}
// echo "\n\n\n-----------\n\n";
// var_dump($data);
// echo "\n\n\n-----------\n\n";
// var_dump($row);
// echo "\n\n\n-----------\n\n";
// exit;
// $leadComma = $rowCount===1 ? '' : ',';
$rowSql = "\n".'( '. implode(', ',$data).')';
// echo "\n$rowCount";
if ($writer === null || $chunkByLength!==false && $len + strlen($rowSql) > $chunkByLength){
$len = 0;
// finish what we have. This line gets written to next file
// file name increments by 1
if ($writer!==null){
fwrite($writer, "\n;\n");
fclose($writer);
}
if (substr($out_prefix,-1)!='/')$path_dir = dirname($out_prefix);
else $path_dir = $out_prefix;
if (!is_dir($path_dir))mkdir($path_dir,0751, true);
$writer = fopen($out_prefix."-insert-{$fileIndex}.sql", 'w');
fwrite($writer, $insert);
$fileIndex++;
$rowCount = 1;
$len = strlen($insert);
}
if ($rowCount!==1){
$rowSql = ",".$rowSql;
}
fwrite($writer,$rowSql);
$len += strlen($rowSql);
}
// $writer will be null if the source json file contains no rows of data.
if ($writer!==null){
fwrite($writer, "\n;\n");
if ($this->disable_foreign_key_checks){
fwrite($writer, "SET FOREIGN_KEY_CHECKS=0;\n");
}
fclose($writer);
}
fclose($reader);
}
}